《第26天》YOLO訓練流程與資料集COCO json格式

2022 iThome 鐵人賽

DAY 26

AI & Data

Object Detection and Image Processing with Python系列第 26 篇

14th鐵人賽

midnightla

團隊大腦已超載

2022-10-11 14:01:24

12229 瀏覽

分享至

前言

使用LabelImg標註獲得PascalVOC(xml檔)

範例

欲轉換成格式COCO(json檔)

範例

COCO格式

COCO數據集是一個具有大規模目標檢測、影像分割和語意標註的數據集。
COCO數據集有5種標註類型，以json檔儲存，如下。

2.1 Object Instances(物件偵測)
2.2 Keypoint Detection(關鍵點檢測)
2.3 Image Captioning(圖像描述)
2.4 Stuff Segmentation(語意分割)
2.5 Panoptic Segmentation(全景分割)
YOLO模型主要任務為Object Detection，故，我們採用Object Instance創建COCO(json檔)。
Object Instance標註格式

4.1 以JSON排版工具呈現

4.2 info：對於資料集的描述。如：中英數OCR資料集、創立年份、提供者名稱、資料集版本。

4.3 images：對於圖片的描述。如：圖片編號、檔案名稱、圖片尺寸。

4.4 annotations：圖片標籤資訊。如：bbox中的0~3，代表bounding box的x座標、y座標、寬度與高度。

4.5 categories：紀錄物件的標籤類別。

PascalVOC轉COCO格式與分配資料集

流程與Python函式

1.1 xml轉換為annos.txt

annos.txt中，每行為imageName、classId、xMin、yMim、xMax、yMax。
一個bbox對應一行(coco格式的id編號從1起算)

def get(root, name):

    return root.findall(name)

def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find {} in {}.'
                                  .format(name, root.tag))
    if length > 0 and len(vars) != length:
        raise NotImplementedError('The size of {} is supposed to be {},
                                  but is {}.'.format(name, length, 
                                  len(vars)))
    if length == 1:
        vars = vars[0]

    return vars

def transfer_xml_to_annos(xmlPath, saveDir):
    n = 1
    for xml in xmlPath:
        tree = ET.parse(xml)
        root = tree.getroot()
        # 圖片名稱
        filename = get_and_check(root, 'filename', 1).text

        # 處理每個標註的檢測框
        with open(saveDir, "a") as bbox:
            for obj in get(root, 'object'):
                category = get_and_check(obj, 'name', 1).text
                label_index = str(classes.index(category) + 1)

                bndbox = get_and_check(obj, 'bndbox', 1)
                xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
                ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
                xmax = int(get_and_check(bndbox, 'xmax', 1).text)
                ymax = int(get_and_check(bndbox, 'ymax', 1).text)

                bbox.write(filename + ' {} {} {} {} {}\n'
                           .format(label_index, xmin, ymin, xmax, ymax))
        print('※ 第{:3d}個xml檔案完成'.format(n))
        print('※ 剩{:3d}個需轉換'.format(len(xmlPath)-n))
        print("-" * 35)
        n += 1

1.2 將圖片依照比例分配train與val資料集

def train_val_split(source, ratio):
    # 讀取images資料夾內圖片檔名
    indexes = os.listdir(os.path.join(source, 'images'))
    # 檔案順序隨機
    random.shuffle(indexes)
    # 創建訓練或驗證集(依照比例分配)
    pic_num = len(indexes)
    train_num = int(pic_num * ratio)
    train_list = indexes[:train_num]
    val_list = indexes[train_num:]

    return train_list, val_list

1.3 將標籤轉換成coco格式，並以json格式存檔。資料夾包含以下檔案。

images(圖片資料夾)
annos.txt(bbox標記)
classes.txt(類別清單)
annotations(儲存json的資料夾)。

def transfer_and_save_coco(source, split_list, dataset, phase):
    # 紀錄處理的圖片數量
    count = 0
    # 讀取bbox信息
    with open(os.path.join(source, 'annos.txt')) as tr:
        annos = tr.readlines()
        # 轉換為coco格式
        for k, index in enumerate(split_list):
            count += 1
            # opencv讀取圖片，得到圖片寬、高
            im = cv2.imread(os.path.join(source, 'images/') + index)
            height, width, _ = im.shape

            # 將圖片檔名、index、寬高信息存入dataset
            dataset['images'].append({'file_name': index,
                                      'id': k,
                                      'width': width,
                                      'height': height})

            for i, anno in enumerate(annos):
                parts = anno.strip().split()

                # 如果圖片檔名與標籤名稱相同，則添加標籤
                if parts[0] == index:
                    # 類別
                    cls_id = parts[1]
                    # x_min
                    x1 = float(parts[2])
                    # y_min
                    y1 = float(parts[3])
                    # x_max
                    x2 = float(parts[4])
                    # y_max
                    y2 = float(parts[5])
                    width = max(0, x2 - x1)
                    height = max(0, y2 - y1)
                    dataset['annotations'].append({
                        'area': width * height,
                        'bbox': [x1, y1, width, height],
                        'category_id': int(cls_id),
                        'id': i,
                        'image_id': k,
                        'iscrowd': 0,
                        # 影像分割時使用，矩形是從左上角順時針畫4點(mask)
                        # 影像分割時'ignore':0與
                        # 'segmentation':[[x1,y1,x2,y1,x2,y2,x1,y2]]
                        'segmentation': []
                    })

            print('   {} images handled'.format(count))

    # 儲存json檔
    folder = os.path.join(source, 'annotations')
    if not os.path.exists(folder):
        os.makedirs(folder)
    json_name = os.path.join(source, 'annotations/{}.json'.format(phase))
    with open(json_name, 'w') as f:
        json.dump(dataset, f)

# 生成train與val之coco格式json檔
def txt_to_coco_json(source, classes, split_list, phase):
    # dataset存放圖片信息和標籤(instances目標檢測、segementation影像分割)
    dataset = {'info': {'description': '', 'url': '', 'version': '1.0',
                        'year': 2022, 'contributor': 'James', 
                        'date_created': ''}, 
               'categories': [], 
               'annotations': [], 
               'images': [], 
               'type': 'instances'}

    # 建立標籤與id的對應關係
    for i, cls in enumerate(classes, 1):
        dataset['categories'].append({'id': i, 'name': cls, 
                                      'supercategory': 'mark'})

    # train, val資料轉換成coco格式，以json儲存
    print('※ 開始轉換{}'.format(phase))
    transfer_and_save_coco(source, split_list, dataset, phase)
    print('※ {}.json Done'.format(phase))

1.4 移動圖片到train與val資料夾

def split_images_to_train_and_val(source, train_list, val_list):
    # 創建圖片train與val資料夾
    folder1 = os.path.join(source, 'train2017')
    if not os.path.exists(folder1):
        os.makedirs(folder1)
    folder2 = os.path.join(source, 'val2017')
    if not os.path.exists(folder2):
        os.makedirs(folder2)
    # 移動圖片到資料夾
    for move_it in train_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'train2017', ''))
    for move_it in val_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'val2017', ''))
    print('移動圖片到train與val資料夾 Done')

完整程式碼

import shutil
import random
import json
import cv2
import os
import xml.etree.ElementTree as ET

# ----------------------------------Step1----------------------------------
def get(root, name):

    return root.findall(name)

def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise NotImplementedError('Can not find {} in {}.'
                                  .format(name, root.tag))
    if length > 0 and len(vars) != length:
        raise NotImplementedError('The size of {} is supposed to be {}, 
                                  but is {}.'.format(name, length, len(vars)))
    if length == 1:
        vars = vars[0]

    return vars

def transfer_xml_to_annos(xmlPath, saveDir):
    n = 1
    for xml in xmlPath:
        tree = ET.parse(xml)
        root = tree.getroot()
        # 圖片名稱
        filename = get_and_check(root, 'filename', 1).text

        # 處理每個標註的檢測框
        with open(saveDir, "a") as bbox:
            for obj in get(root, 'object'):
                category = get_and_check(obj, 'name', 1).text
                label_index = str(classes.index(category) + 1)

                bndbox = get_and_check(obj, 'bndbox', 1)
                xmin = int(get_and_check(bndbox, 'xmin', 1).text) - 1
                ymin = int(get_and_check(bndbox, 'ymin', 1).text) - 1
                xmax = int(get_and_check(bndbox, 'xmax', 1).text)
                ymax = int(get_and_check(bndbox, 'ymax', 1).text)

                bbox.write(filename + ' {} {} {} {} {}\n'
                           .format(label_index, xmin, ymin, xmax, ymax))
        print('※ 第{:3d}個xml檔案完成'.format(n))
        print('※ 剩{:3d}個需轉換'.format(len(xmlPath)-n))
        print("-" * 35)
        n += 1

# ----------------------------------Step2----------------------------------
# 將圖片依照比例分配train與val
def train_val_split(source, ratio):
    # 讀取images資料夾內圖片檔名
    indexes = os.listdir(os.path.join(source, 'images'))
    # 檔案順序隨機
    random.shuffle(indexes)
    # 創建訓練或驗證集(待優化，自動比例split)
    pic_num = len(indexes)
    train_num = int(pic_num * ratio)
    train_list = indexes[:train_num]
    val_list = indexes[train_num:]

    return train_list, val_list

# 轉換coco格式dataset
def transfer_and_save_coco(source, split_list, dataset, phase):
    # 紀錄處理的圖片數量
    count = 0
    # 讀取Bbox信息
    with open(os.path.join(source, 'annos.txt')) as tr:
        annos = tr.readlines()
        # 轉換為coco格式
        for k, index in enumerate(split_list):
            count += 1
            # opencv讀取圖片，得到圖片寬、高
            im = cv2.imread(os.path.join(source, 'images/') + index)
            height, width, _ = im.shape

            # 將圖片檔名、index、寬高信息存入dataset
            dataset['images'].append({'file_name': index,
                                      'id': k,
                                      'width': width,
                                      'height': height})

            for i, anno in enumerate(annos):
                parts = anno.strip().split()

                # 如果圖片檔名與標籤名稱相同，則添加標籤
                if parts[0] == index:
                    # 類別
                    cls_id = parts[1]
                    # x_min
                    x1 = float(parts[2])
                    # y_min
                    y1 = float(parts[3])
                    # x_max
                    x2 = float(parts[4])
                    # y_max
                    y2 = float(parts[5])
                    width = max(0, x2 - x1)
                    height = max(0, y2 - y1)
                    dataset['annotations'].append({
                        'area': width * height,
                        'bbox': [x1, y1, width, height],
                        'category_id': int(cls_id),
                        'id': i,
                        'image_id': k,
                        'iscrowd': 0,
                        # 影像分割時使用，矩形是從左上角順時針畫4點(mask)
                        # 影像分割時'ignore': 0 與
                        # 'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]]
                        'segmentation': []
                    })

            print('   {} images handled'.format(count))

    # 儲存json檔
    folder = os.path.join(source, 'annotations')
    if not os.path.exists(folder):
        os.makedirs(folder)
    json_name = os.path.join(source, 'annotations/{}.json'.format(phase))
    with open(json_name, 'w') as f:
        json.dump(dataset, f)

# 生成train與val之coco格式json檔
def txt_to_coco_json(source, classes, split_list, phase):
    # dataset存放圖片信息和標籤(instances目標檢測、segementation影像分割)
    dataset = {'info': {'description': '', 'url': '', 'version': '1.0', 
                        'year': 2022, 'contributor': 'James', 
                        'date_created': ''},
               'categories': [], 
               'annotations': [], 
               'images': [],
               'type': 'instances'}

    # 建立標籤與id的對應關係
    for i, cls in enumerate(classes, 1):
        dataset['categories'].append({'id': i, 'name': cls, 
                                      'supercategory': 'mark'})

    # train, val資料轉換成coco格式，以json儲存
    print('※ 開始轉換{}'.format(phase))
    transfer_and_save_coco(source, split_list, dataset, phase)
    print('※ {}.json Done'.format(phase))


# 移動圖片到train與val資料夾
def split_images_to_train_and_val(source, train_list, val_list):
    # 創建圖片train與val資料夾
    folder1 = os.path.join(source, 'train2017')
    if not os.path.exists(folder1):
        os.makedirs(folder1)
    folder2 = os.path.join(source, 'val2017')
    if not os.path.exists(folder2):
        os.makedirs(folder2)
    # 移動圖片到資料夾
    for move_it in train_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'train2017', ''))
    for move_it in val_list:
        shutil.move(source + '/images/' + move_it, 
                    os.path.join(source, 'val2017', ''))
    print('移動圖片到train與val資料夾 Done')
if __name__ == '__main__':
    source = './dataests3'
    # 讀取標籤類別
    with open(os.path.join(source, 'classes.txt')) as f:
        classes = f.read().strip().split()

    '''【Step1】xml轉換為annos.txt：其中每行為imageName、classId、xMin、
       yMim、xMax、yMax，一個bbox對應一行(coco格式的id編號從1起算)'''
    print('【Step1】xml轉annos.txt')

    # annos.txt存檔路徑
    saveDir = os.path.join(source, 'annos.txt')
    # image資料夾路徑
    imageDir = os.path.join(source, 'images/')
    # image檔案路徑
    imagePath = os.listdir(imageDir)
    imagePath = [imageDir + i for i in imagePath]
    # xml資料夾路徑
    xmlDir = os.path.join(source, 'xmls/')
    # xml檔案路徑
    xmlPath = os.listdir(xmlDir)
    xmlPath = [xmlDir + i for i in xmlPath]

    # 將xml轉換為annos
    transfer_xml_to_annos(xmlPath, saveDir)
    print('=' * 60)

    '''【Step2】將標籤轉換成coco格式，並以json格式存檔。資料夾包含
        images(圖片資料夾)、annos.txt(bbox標記)、
        classes.txt(類別清單)及annotations(儲存json的資料夾)。'''
    print('【Step2】annos.txt轉coco，並以json格式儲存')

    # 將圖片依照比例分配train與val
    train_list, val_list = train_val_split(source, 0.9)

    # # 生成train與val之coco格式json檔
    txt_to_coco_json(source, classes, train_list, 'instances_train2017')
    print('-' * 35)
    txt_to_coco_json(source, classes, val_list, 'instances_val2017')
    print('-' * 35)

    # 移動圖片到train與val資料夾
    split_images_to_train_and_val(source, train_list, val_list)
    print('程式執行結束')